# ::: mimport :::
# ::: Michael Jankowski ::: April 7, 2016 ::: michael.jankowski@uni-oldenburg.de ::: www.michael-jankowski.de :::
# ::: version 0.1 :::
# ::: imports margins results from Stata ::: Works only in combination with the 'mexport.ado' ::: See: http://www.michael-jankowski.de/ado/mexport/mexport
# ::: ABSOLUTELY NO WARRANTY :::
# ::: options :::
# ::: model = "string"; string should be the same as used in Stata for exporting the margins results (see file("") suboption of mexport)
# ::: remove.files = logical; deletes the files after impoting them into R.
####################################################################################################################################################

mimport <- function(model = "output", remove.files = F){ # define function with defaults...

# Step 1: Read in margins results

# Step 1.1: check if file exists

if(!file.exists(paste0(model,"_results_mexport.csv"))){
stop("The file ",paste0(model,"_results_mexport.csv")," was not found.")
}

# Step 1.2: read data

data <- read.csv2(paste0(model,"_results_mexport.csv"), stringsAsFactors = F, header = F, row.names = NULL)

# Step 2: Extract model information from table

info <- data[1,1] # model information

data <- data[-1,] # delete model information

# Step 2.1: get info on average marginal effects

if(!grepl("marginal", info)){ # if not "marginal" --> predicted values
ame <- "" # ame is empty
} else{ # if marginal effects were computed
if(!grepl("continuous", info)) ame <- "cat" # if marginal effect is for a factor variable
if(grepl("continuous", info)) ame <- "cont" # if marginal effect is for a continuous variable
}

# Step 2.1.2: Get Factor Levels

#command <- stringr::str_extract(info, "margins.*?,")

#command <- gsub("margins|,","",command)

#command <- stringr::str_trim(command)

#command <- unlist(stringr::str_extract_all(command, "^.+?(#|##).+?([[:space:]]|$)|[[:space:]].+?(#|##).+?[[:space:]]"))

#command <- 
  
# Step 2.2: check if at was used

if(grepl("at=yes", info)){ # if at was used
at <- T # set at to TRUE
} else{ # else...
at <- F # set at to false
}

# Step 3: if at was used (at=T), read in legend

if(at){ # check for at

# Step 3.1: check if file exists

if(!file.exists(paste0(model,"_legend_mexport.csv"))){
stop("The file ",paste0(model,"_legend_mexport.csv")," was not found.\n")
}

# Step 3.2.: read in legend

legend <- read.csv2(paste0(model,"_legend_mexport.csv"), stringsAsFactors = F)

# Step 3.3.: adjust at legend so that it can be merged with values from margins table

legend$at <- gsub("[^0-9]+\\._at$","\\._at",legend$at)
} # end of at-check

#################################################################################
# Step 4: create table with data
#################################################################################

# Step 4.1: remove first two rows from data [why?...add comment here...]

data <- data[-c(1,2),]

# Step 4.2: if marginal effects were used: data has to be transformed

if(ame != ""){

currentName <- data[1,1] # reads in first factor level

data$level <- "" # creates empty string variable with factor level

for(i in 1:nrow(data)){ # loop through all observations --> this probably can be improved.

if(data[i,2] != ""){ # if col 2 (estimates) is not empty...
data$level[i] <- currentName # add current level to estimate
} else{ # if col 2 is empty --> new level exists
currentName <- data[i,1] # get new level
data$level[i] <- currentName # add level to current row
} #end of if
  
} # end of loop

data <- data[data[,2]!="",] # delete rows where estimates are not reported

#if(ame=="cat") data <- data[-grep("[0-9]+b\\.",data[,"level"]),] # if ame is for categorical variable, adjust level name
} # end of ame adjustment

# Step 4.3: extract confidence intervals

n_row <- nrow(data) # get number of rows

ci <- data[seq(2,n_row,2),2] # every second row = ci

ci <- strsplit(ci, ",") # split ci in lower and upper

ci <- Reduce(rbind,ci) # create dataset from list

ci <- apply(ci, 2, function(x) as.numeric(x)) # change from character to numeric

colnames(ci) <- c("llci","ulci") # set colnames for lower and upper ci

b <- as.numeric(data[seq(1,n_row,2),2]) # get point estimates ("b")

if(ame != "") ame_levels <- data[seq(1, n_row, 2), "level"] #  get levels for marginal effects

# Step 5: get factor levels

vars <- data[seq(1, n_row, 2), 1] # get row identifier from estout table

vars <- strsplit(vars, " # ") # split into variables

vars <- Reduce(rbind, vars) # make dataset

n_col <- ncol(vars) # number of variables

at_test <- NA # create test for var that contains at values

for(i in 1:n_col){ # loop through all variables

if(grepl("_at", vars[1,i])) at_test <- i # identify variable that contains "at"

} # end of loop

colnames(vars) <- paste0(1:n_col) # set colanmes (???)

count <- 1 # set counter

for(i in 1:n_col){ # loop thorugh variables

if(i==at_test & !is.na(at_test)){ # if var is at variable
colnames(vars)[at_test] <- "at" # name variable as at variable
} else{ # else call it a factor variable
colnames(vars)[i] <- paste0("Factor",count) # factor1, factor2, ...
count = count +1 # increase factor count
} # end of if
} # end of loop

if(!is.na(at_test)) colnames(vars)[at_test] <- "at" # rename at variable (???)

vars <- as.data.frame(vars, stringsAsFactors = F) # make dataset from vars

rownames(vars) <- 1:nrow(vars) # rename rownames to avoid errors / warnings when merging

# Step 6: merge data together

vars$ORDER <- 1:nrow(vars) # get order of observations

# Step 6.1: combine factors and at (if at used)

if(at){
final_vars <- merge(vars, legend) # merge legend and varnames if at used
} else{
final_vars <- vars # rename varnames if legend not used
}

final_vars <- final_vars[order(final_vars$ORDER),] # restore order of data

# Step 6.2: combine factors, ame levels and results

if(ame != ""){ # if marginal effects
margins_table <- data.frame(b, ci, final_vars, ame_levels) # combine all estimates, varnames and factor levels 
} else{ # if no me...
margins_table <- data.frame(b, ci, final_vars) # only combine estimates and varnames 
}

# Step 7: cleaning of table

margins_table <- margins_table[,-grep("ORDER|^at$",colnames(margins_table))]  # delete helping variables

if(ame != "") margins_table <- margins_table[!(margins_table$b == 0 & margins_table$llci == 0 & margins_table$ulci == 0),]

# Step 8: return results

if(remove.files){ # if files should be removed...
warning("Files are removed!")
file.remove(paste0(model,"_results_mexport.csv"))
if(at) file.remove(paste0(model,"_legend_mexport.csv"))
}

mplot <- plot.mimport(margins_table)

margins_table <- list(mimport = margins_table, plot = mplot)

class(margins_table) <- "mimport"

return(margins_table) # return table
} # end of function

########################################################################################################################
########################################################################################################################
########################################################################################################################

marginsplot <- function(data = NULL, # define function
x = NULL, # string for x axis
y = "b", # point estimate
by = NULL, # for different lines
plot_by = NULL, # for facetting
point = F, # use point estimates
line = T, # use lines (only with continuous x, see below)
ci = T, # plot confidence intervals?
xlab = NULL, # label for x axis
ylab = NULL, # label for y axis
title = NULL, # title for plot
subtitle = NULL, # subtitle (requires newest ggplot2 version!)
guides = T, # report legend?
alpha = .1, # alpha for ci ribbons
scales.free = F, # free scales when facetting
linetype = NULL, # different linetype of lines
shape = NULL, # different shapes of points
by_factor = F, # if by variable should be treated as factor
legend = "bottom"
){

# check for errors...

if(!is.data.frame(data)) stop("data is not a data.frame.") # object must be data.frame

if(!require(ggplot2)) stop("Please install the package 'ggplot2'.") # ggplot2 has to be installed

# check for titles

if(!is.null(subtitle) & is.null(title)){ # subtitle only allowed, when title is specified: otherwise subtitle --> title
warning("'title' is empty, but 'subtitle' specified. Will use subtitle as plot title.")
title <- subtitle
subtitle <- NULL
}

if(!is.null(by)){
if(by_factor) data[,by] <- as.factor(data[,by])
}

# is x a factor?

factor_test = F

if(is.factor(data[,x]) | is.character(data[,x])) factor_test = T

if(factor_test){
point = T
line = F
}

if(point) line <- F
if(line) point <- F

if(is.null(by)) p <- ggplot(data, aes_string(x = x, y = y)) # create plot layer

if(!is.null(by)) p <- ggplot(data, aes_string(x = x, y = y, group = by)) 

if(point){
p <- p + geom_point() # add points
if(!is.null(shape)) p <- p + aes_string(shape = shape)
}

if(line){
p <- p + geom_line() # add lines
if(!is.null(linetype)) p <- p + aes_string(linetype = linetype)
}

if(ci & point) p <- p + geom_pointrange(aes_string(ymin="llci", ymax="ulci")) # set ci for points

if(!is.null(by)) p <- p + aes_string(color = by) # set color for by variable

if(ci & line){ # set ci for line
if(is.null(by)) p <- p + geom_ribbon(aes_string(ymin="llci", ymax="ulci"), alpha = alpha) # if no by...just set alpha of ribbon...
if(!is.null(by)) p <- p + geom_ribbon(aes_string(ymin="llci", ymax="ulci"), colour = NA, alpha = alpha) # if by: suppress line around the ci
}

if(ci & line & !is.null(by)) p <- p + aes_string(fill = by) # fill ribbon (could that be added above?)

if(scales.free){ #if scales should be free
scales.free = "free" # define require string
} else{ # else set object to NULL
scales.free = NULL
}

if(!is.null(plot_by)) p <- p + facet_wrap(as.formula(paste("~", plot_by)), scales = scales.free) # facetting

if(!is.null(title) & is.null(subtitle)) p <- p + ggtitle(title) # add title

if(!is.null(title) & !is.null(subtitle)) p <- p + ggtitle(title, subtitle = subtitle) # add subtitle

if(!is.null(xlab)) p <- p + xlab(xlab) # add xlab

if(!is.null(ylab)) p <- p + ylab(ylab) # add ylab

p <- p + theme_bw() # use bw theme

if(guides){
p <- p + theme(legend.position = legend)
} else{
p <- p + guides(color = F, fill = F, linetype = F, shape = F) # suppress guides
}

return(p) # return plot
}

# renaming factors

rename_factors <- function(model = NULL, new_names = NULL){

get_factor_vars <- grep("Factor[0-9]+", colnames(model))

if(length(get_factor_vars) != length(new_names)) stop("names must have same length as number of factors in data.frame.")

for(i in 1:length(get_factor_vars)){
colnames(model)[colnames(model) == paste0("Factor",i)] <- new_names[i]
}

model 
}

#######################################################################################
#######################################################################################
#######################################################################################

plot.mimport <- function(data, ...){

if(class(data) == "mimport") data <- data$mimport
if(class(data) != "data.frame") stop("No data.frame provided.")

n_factors <- sum(grepl("Factor[0-9]+",colnames(data)))  

if(n_factors > 3) stop("more than 3 factors cannot be plotted.")

if(n_factors > 0){
n_levels <- sapply(1:n_factors, function(x) length(unique(data[,paste0("Factor",x)])))
factor_names <- paste0("Factor",1:n_factors)
factor_names <- factor_names[order(n_levels, decreasing = T)]
first_factor <- factor_names[1]
if(n_factors > 1) second_factor <- factor_names[2]
if(n_factors > 2) third_factor <- factor_names[3]
}

# all cont variables

other_vars <- colnames(data)[!grepl("Factor[0-9]+|^b$|^llci$|^ulci$",colnames(data))]

# all cont vars without missing values

which_other_vars <- sapply(other_vars, function(x) any(!is.na(data[,x])))

# when cont vars exists, only keep those with non-missing

if(length(other_vars)>0){ 
  cont_vars <- other_vars[which_other_vars]
  
  more_than_one_level <- sapply(cont_vars, function(x) length(unique(data[,x]))>1)
  
  cont_vars <- cont_vars[more_than_one_level]
  
  n_cont <- sum(more_than_one_level)
  } else{
  n_cont <- 0
}

if(n_cont > 2) stop("Not more than 2 cont vars.")

if(n_cont > 0){

n_levels_cont <- sapply(cont_vars, function(x) length(unique(data[,x])))

x <- cont_vars[match(sort(n_levels_cont)[1],n_levels_cont)]

if(n_cont == 2){
  second_cont <- cont_vars[match(sort(n_levels_cont)[2],n_levels_cont)] 
}

if(n_cont>1){

  if(n_factors == 0) first_factor <- second_cont
  if(n_factors == 1) second_factor <- second_cont
  if(n_factors == 2) third_factor <- second_cont

  n_factors <- n_factors + 1
  
}
}

if(n_cont == 0){
  x <- first_factor
  if(n_factors>1)  first_factor <- second_factor
  if(n_factors>2)  second_factor <- third_factor; third_factor <- NULL
}

if(n_factors == 0){
  first_factor <- NULL
  second_factor <- NULL
  third_factor <- NULL
}

if(n_factors == 1){
  second_factor <- NULL
  third_factor <- NULL
  
  if(length(unique(data[,x]))<length(unique(data[,first_factor]))){
    x1 <- first_factor
    first_factor <- x
    x <- x1
  }
}

if(n_factors == 2){
  third_factor <- NULL
}

marginsplot(data = data,
                        x = x, # string for x axis
                        by = first_factor, # for different lines
                        plot_by = second_factor, # for facetting
                        linetype = third_factor, # different linetype of lines
                        shape = third_factor, # different shapes of points
                        by_factor = T,
                        ...)

}



